/* Runs econometric estimators for the RD data. Merges with nonexpert and
expert survey results.
*/

cap log close
log using "${logs}/rd_reg_micro.log", replace

local count = 1
foreach file in  "ADGP1" "BDGP2" "CDGP3" "DDGP4" "EDGP5" "FDGP6" ///
			"GDGP7" "HDGP8" "IDGP9" "JDGP10" "KDGP11" {	
	* Open coefficients file
	use "${dat}/DGP/`file'_Coefficients.dta", clear
	scalar B0 = cst in 1
	scalar B1 = x in 1
	scalar B2 = x2 in 1
	scalar B3 = x3 in 1
	scalar B4 = x4 in 1
	scalar B5 = x5 in 1
	scalar D1 = x in 2
	scalar D2 = x2 in 2
	scalar D3 = x3 in 2
	scalar D4 = x4 in 2
	scalar D5 = x5 in 2
	scalar sigma = res_std_global in 1
	
	foreach jump in -15 -9 -54 -324 -1944 0 1944 324 54 9 15 {
		forvalues sd = 1/8 {
		di "`file'"
		di `jump'
		di `sd'
	*Open relevant dataset:
	use "${sim_dat}/`file'_`jump'_`sd'.dta", clear
	
	*Merge in theoretical bandwidths, asymptotic biases
	gen playerdgp = "`file'"
	merge m:1 playerdgp using "${dat}/dgp_theoretical_bandwidths_biases.dta", nogen
		
	*Piecewise quintic regression:
	*Generate treatment variable:
	gen D = 0
	replace D = 1 if x >= 0
	
	*Create interaction terms:
	forvalues num = 2/5{
	gen x`num' = x^(`num')	
	}
	gen dx = D*x
	gen dx2 = D*x2
	gen dx3 = D*x3
	gen dx4 = D*x4
	gen dx5 = D*x5
	
	*Piecewise quintic regression. Coefficient on D is treatment effect.
	reg y x x2 x3 x4 x5 D dx dx2 dx3 dx4 dx5
	
	*Store treatment effect in results matrix:
	local D =  _b[D]

	local D_cl = _b[D] - 1.96 * _se[D]
	local D_cu = _b[D] + 1.96 * _se[D]
	local D_se = _se[D]

	*Using rdrobust with MSE bandwidth
	rdrobust y x, bwselect(mserd) all
	
	local rd_c_mse = e(tau_cl)
	local rd_c_cl_mse = e(tau_cl) - 1.96 *  e(se_tau_cl) 
	local rd_c_cu_mse = e(tau_cl) + 1.96 *  e(se_tau_cl) 
	local rd_se_mse = e(se_tau_cl)
	local rd_c_mse_bw = e(h_l)
	
	*Robust std. err.
	local rd_r_cl_mse = e(tau_bc) - 1.96 *  e(se_tau_rb)
	local rd_r_cu_mse =  e(tau_bc) + 1.96 *  e(se_tau_rb) 
	local rd_r_se_mse = e(se_tau_rb)
	local rd_bc_mse = e(tau_bc)
	
	*Store additional quantities:
	local main_bw_mse = e(h_l)
	local obs_l = e(N_l)
	local obs_r = e(N_r)
	local obs_eff_l_mse = e(N_h_l)
	local obs_eff_r_mse = e(N_h_r)
	
	*Using rdrobust with IK bandwidth
	*First calculate and store IK bandwidth:
	altrdbwselect y x,  bwselect(IK)
	local h_micro = e(h_IK)
	local b_micro = e(b_IK)
	
	rdrobust y x, h(`h_micro') b(`b_micro') all
	local rd_c_ik = e(tau_cl)
	local rd_c_se_ik = e(se_tau_cl)
	local rd_c_cl_ik = e(tau_cl) - 1.96 *  e(se_tau_cl) 
	local rd_c_cu_ik = e(tau_cl) + 1.96 *  e(se_tau_cl) 
	
	*Robust std. err.
	local rd_r_cl_ik = e(tau_bc) - 1.96 *  e(se_tau_rb)
	local rd_r_cu_ik =  e(tau_bc) + 1.96 *  e(se_tau_rb) 
	
	*Store additional quantities:
	local main_bw_ik = e(h_l)
	local obs_eff_l_ik = e(N_h_l)
	local obs_eff_r_ik = e(N_h_r)
	
	*IK with theoretically optimal bandwidth
	local theor_optim_bw = theor_optim_bw in 1
	rdrobust y x, h(`theor_optim_bw') all
	local rd_c_ik_theor_bw = e(tau_cl)
	local rd_c_se_ik_theor_bw = e(se_tau_cl)
	local rd_c_cl_ik_theor_bw = e(tau_cl) - 1.96 * e(se_tau_cl)
	local rd_c_cu_ik_theor_bw = e(tau_cl) + 1.96 * e(se_tau_cl)
	
	*CCT with theoretically optimal bandwidth
	*Also correct the conventional estimates by shifting by the theoretical bias
	local bias = theor_asymp_bias in 1
	local rd_c_cct_theor_bw = e(tau_cl) - (`bias')
	local rd_c_se_cct_theor_bw = e(se_tau_cl)
	local rd_c_cl_cct_theor_bw = e(tau_cl) - 1.96 * e(se_tau_cl) - (`bias')
	local rd_c_cu_cct_theor_bw = e(tau_cl) + 1.96 * e(se_tau_cl) - (`bias')
	local rd_c_bias_cct_theor_bw = e(tau_cl) - e(tau_bc)
	
	* CCT robust with theoretically optimal bandwidth
	local rd_r_cct_theor_bw = e(tau_bc)
	local rd_r_se_cct_theor_bw = e(se_tau_rb)
	local rd_r_cl_cct_theor_bw = e(tau_bc) - 1.96 * e(se_tau_rb)
	local rd_r_cu_cct_theor_bw = e(tau_bc) + 1.96 * e(se_tau_rb)
	
	* rdrobust with CER-optimal bandwidth
	rdrobust y x, bwselect(cerrd) all
	
	local rd_c_cer = e(tau_cl)
	local rd_c_cl_cer = e(tau_cl) - 1.96 *  e(se_tau_cl) 
	local rd_c_cu_cer = e(tau_cl) + 1.96 *  e(se_tau_cl) 
	local rd_se_cer = e(se_tau_cl)
	
	*Robust std. err.
	local rd_r_cl_cer = e(tau_bc) - 1.96 *  e(se_tau_rb)
	local rd_r_cu_cer =  e(tau_bc) + 1.96 *  e(se_tau_rb) 
	local rd_r_se_cer = e(se_tau_rb)
	local rd_bc_cer = e(tau_bc)
	
	*Store additional quantities:
	local main_bw_cer = e(h_l)
	local obs_eff_l_cer = e(N_h_l)
	local obs_eff_r_cer = e(N_h_r)
	
	* Neyman-Pearson
	gen ytilde = y - B0
	replace ytilde = ytilde - B1*x - B2*x2 - B3*x3 - B4*x4 - B5*x5 if x < 0
	replace ytilde = ytilde - D1*x - D2*x2 - D3*x3 - D4*x4 - D5*x5 if x >= 0
	sum ytilde if x >= 0
	local np_tstat = abs(r(mean)) / sqrt(sigma^2 / r(N))
	local np_reject = abs(`np_tstat') > 1.96
	
	clear
	set obs 1
	*Store results:
	gen file = "`file'_`jump'_`sd'"
	
	*Store microdata results:
	
	*Quintic piecewise regression estimates
	gen D = `D'
	label variable D "Piece-wise quintic estimate"
	gen D_se = `D_se'
	label variable D_se "Piece-wise quintic std. err."
	gen D_cl = `D_cl'
	label variable D_cl "Piece-wise quintic lower CI"
	gen D_cu = `D_cu'
	label variable D_cu "Piece-wise quintic upper CI"

	*rdrobust microdata estimates using the MSE bandwidth selector
	*Conventional estimates
	gen rd_c_mse = `rd_c_mse'
	label variable rd_c_mse "Conventional estimate using MSE bw"
	gen rd_c_cl_mse = `rd_c_cl_mse'
	label variable rd_c_cl_mse "Conventional lower CI using MSE bw"
	gen rd_c_cu_mse = `rd_c_cu_mse'
	label variable rd_c_cu_mse "Conventional upper CI using MSE bw"
	gen rd_se_mse = `rd_se_mse'
	label variable rd_se_mse "rdrobust std. err. (conventional)"
	gen rd_c_mse_bw = `rd_c_mse_bw'
	label variable rd_c_mse_bw "rdrobust bandwidth"

	*rdrobust
	gen rd_bc_mse = `rd_bc_mse'
	label variable rd_bc_mse "Bias-corrected estimate using MSE bw"
	gen rd_r_cl_mse = `rd_r_cl_mse'
	label variable rd_r_cl_mse "Robust std. err. lower CI using MSE bw"
	gen rd_r_cu_mse = `rd_r_cu_mse'
	label variable rd_r_cu_mse "Robust std. err. upper CI using MSE bw"
	gen rd_r_se_mse = `rd_r_se_mse'
	label variable rd_r_se_mse "rdrobust std. err. (robust, MSE)"
	
	*rdrobust microdata estimates using the IK bandwidth selector:
	*Conventional estimates:
	gen rd_c_ik = `rd_c_ik'
	label variable rd_c_ik "Conventional estimate using IK bw"
	gen rd_c_cl_ik = `rd_c_cl_ik'
	label variable rd_c_cl_ik "Conventional lower CI using IK bw"
	gen rd_c_cu_ik = `rd_c_cu_ik'
	label variable rd_c_cu_ik "Conventional upper CI using MSE bw"
	gen rd_c_se_ik = `rd_c_se_ik'
	label variable rd_c_se_ik "Conventional s.e. using IK bw"
	gen rd_c_ik_bw = `h_micro'
	label variable rd_c_ik_bw "IK bandwidth"
	
	*Robust ci ik bw	
	gen rd_r_cl_ik = `rd_r_cl_ik'
	label variable rd_r_cl_ik "Robust std. err. lower CI using IK bw"
	gen rd_r_cu_ik = `rd_r_cu_ik'
	label variable rd_r_cu_ik "Robust std. err. upper CI using IK bw"
	
	* rdrobust microdata estimates using the theoretical bandwidth and "IK" conventional procedure
	gen rd_c_ik_theor_bw = `rd_c_ik_theor_bw'
	label variable rd_c_ik_theor_bw "Conventional IK estimate using theoretical bw"
	gen rd_c_cl_ik_theor_bw = `rd_c_cl_ik_theor_bw'
	label variable rd_c_cl_ik_theor_bw "Conventional lower CI using IK procedure, theoretical bw"
	gen rd_c_cu_ik_theor_bw = `rd_c_cu_ik_theor_bw'
	label variable rd_c_cu_ik_theor_bw "Conventional upper CI using IK procedure, theoretical bw"
	gen rd_c_se_ik_theor_bw = `rd_c_se_ik_theor_bw'
	label variable rd_c_se_ik_theor_bw "Conventional s.e. using IK procedure, theoretical bw"
	
	* rdrobust microdata estimates using the theoretical bandwidth, biases and CCT conventional procedure
	gen rd_c_cct_theor_bw = `rd_c_cct_theor_bw'
	label variable rd_c_cct_theor_bw "Conventional CCT estimate using theoretical bw"
	gen rd_c_cl_cct_theor_bw = `rd_c_cl_cct_theor_bw'
	label variable rd_c_cl_cct_theor_bw "Conventional lower CI using CCT procedure, theoretical bw"
	gen rd_c_cu_cct_theor_bw = `rd_c_cu_cct_theor_bw'
	label variable rd_c_cu_cct_theor_bw "Conventional upper CI using CCT procedure, theoretical bw"
	gen rd_c_se_cct_theor_bw = `rd_c_se_cct_theor_bw'
	label variable rd_c_se_cct_theor_bw "Conventional s.e. using CCT procedure, theoretical bw"
	
	* rdrobust microdata estimates using the theoretical bandwidth and CCT robust procedure (and bias-correction)
	gen rd_r_cct_theor_bw = `rd_r_cct_theor_bw'
	label variable rd_r_cct_theor_bw "Bias-corrected CCT estimate using theoretical bw"
	gen rd_r_cl_cct_theor_bw = `rd_r_cl_cct_theor_bw'
	label variable rd_r_cl_cct_theor_bw "Robust lower CI using CCT procedure, theoretical bw"
	gen rd_r_cu_cct_theor_bw = `rd_r_cu_cct_theor_bw'
	label variable rd_r_cu_cct_theor_bw "Robust upper CI using CCT procedure, theoretical bw"
	gen rd_r_se_cct_theor_bw = `rd_r_se_cct_theor_bw'
	label variable rd_r_se_cct_theor_bw "Robust s.e. using CCT procedure, theoretical bw"
	
	*rdrobust microdata estimates using the CER bandwidth selector
	*Conventional estimates
	gen rd_c_cer = `rd_c_cer'
	label variable rd_c_cer "Conventional estimate using CER bw"
	gen rd_c_cl_cer = `rd_c_cl_cer'
	label variable rd_c_cl_cer "Conventional lower CI using CER bw"
	gen rd_c_cu_cer = `rd_c_cu_cer'
	label variable rd_c_cu_cer "Conventional upper CI using CER bw"
	gen rd_se_cer = `rd_se_cer'
	label variable rd_se_cer "rdrobust std. err. (conventional, CER)"

	*rdrobust
	gen rd_bc_cer = `rd_bc_cer'
	label variable rd_bc_cer "Bias-corrected estimate using CER bw"
	gen rd_r_cl_cer = `rd_r_cl_cer'
	label variable rd_r_cl_cer "Robust std. err. lower CI using CER bw"
	gen rd_r_cu_cer = `rd_r_cu_cer'
	label variable rd_r_cu_cer "Robust std. err. upper CI using CER bw"
	gen rd_r_se_cer = `rd_r_se_cer'
	label variable rd_r_se_cer "rdrobust std. err. (robust, CER)"
	
	* Neyman-Pearson
	gen np_tstat = `np_tstat'
	label variable np_tstat "Neyman-Pearson t-statistic"
	gen np_reject = `np_reject'
	label variable np_reject "Neyman-Pearson rejection decision"

	*Store additional quantities:
	gen theor_optim_bw = `theor_optim_bw'
	label variable theor_optim_bw "Theoretically optimal bandwidth"
	gen asymp_bias = `bias'
	label variable asymp_bias "Asymptotic bias"
	gen main_bw_mse = `main_bw_mse'
	label variable main_bw_mse "Main microdata mse bandwidth used"
	gen obs_l = `obs_l'
	label variable obs_l "Actual observations below threshold"
	gen obs_r = `obs_r'
	label variable obs_r "Actual observations above threshold"
	gen obs_eff_l_mse = `obs_eff_l_mse'
	label variable obs_eff_l_mse "Observations used below threshold using MSE bw"
	gen obs_eff_r_mse = `obs_eff_r_mse'
	label variable obs_eff_r_mse "Observations used above threshold using MSE bw"
	gen rd_c_bias_cct_theor_bw = `rd_c_bias_cct_theor_bw'
	label variable rd_c_bias_cct_theor_bw "CCT estimated bias when using theoretical bw"
	
	gen main_bw_ik = `main_bw_ik'
	label variable main_bw_ik "Main microdata IK bandwidth used"
	gen obs_eff_l_ik = `obs_eff_l_ik'
	label variable obs_eff_l_ik "Observations used below threshold using IK bw"
	gen obs_eff_r_ik = `obs_eff_r_ik'
	label variable obs_eff_r_ik "Observations used above threshold using IK bw"

	macro drop rd_r_se_mse
	foreach var2 in D D_se D_cl D_cu rd_c_mse rd_c_cl_mse rd_c_cu_mse rd_r_cl_mse rd_se_mse rd_r_cu_mse rd_c_cer rd_c_cl_cer rd_c_cu_cer rd_r_cl_cer rd_se_cer rd_r_cu_cer rd_c_ik rd_c_cl_ik rd_c_cu_ik rd_r_cl_ik rd_r_cu_ik np_tstat np_reject main_bw_mse obs_l obs_r obs_eff_l_mse obs_eff_r_mse main_bw_ik obs_eff_l_ik obs_eff_r_ik main_bw_cer obs_eff_l_cer obs_eff_r_cer {
		macro drop `var2'
	}
	
	*Store in temporary file
	tempfile regresult`count'
	
	save "`regresult`count''"
	local count = `count' + 1
}
}
}

local count = `count' - 1

di `count'

use "`regresult1'", clear
forvalues num = 2/`count' {

	append using "`regresult`num''"
	
}

* Merge in other estimates
merge 1:1 file using "${dat}/rdhonest_estimates.dta", nogen

label variable file "Dataset"
label variable disc "Discontinuity magnitude indicator"
label variable rdh_taylor_true_estimate "RDHonest discontinuity point estimate (w/ true deriv bound)"
label variable rdh_taylor_true_sd "RDHonest SD of discontinuity point estimate (w/ true deriv bound)"
label variable rdh_taylor_true_lower "RDHonest 95% CI lower bound (w/ true deriv bound)"
label variable rdh_taylor_true_upper "RDHonest 95% CI upper bound (w/ true deriv bound)"
label variable rdh_taylor_true_reject "RDHonest reject null indicator (p=0.05, w/ true deriv bound)"
label variable rdh_taylor_true_bound "True Taylor bound on DGP's 2nd deriv (requires omniscience)"
label variable rdh_taylor_rot_estimate "RDHonest discontinuity point estimate (w/ rule-of-thumb deriv bound)"
label variable rdh_taylor_rot_sd "RDHonest SD of discontinuity point estimate (w/ ROT deriv bound)"
label variable rdh_taylor_rot_lower "RDHonest 95% CI lower bound (w/ ROT deriv bound)"
label variable rdh_taylor_rot_upper "RDHonest 95% CI upper bound (w/ ROT deriv bound)"
label variable rdh_taylor_rot_reject "RDHonest reject null indicator (p=0.05, w/ ROT deriv bound"
label variable rdh_taylor_rot_bound "Rule-of-thumb Taylor bound on DGP's 2nd deriv"

save "${dat}/rd_estimates_micro.dta", replace

** data from phase 1 **
use "${dat}/Pilot 1.0/survey_merged.dta", clear

gen repeat_participant = 2
gen phase = 1
keep pid gid playerdgp playerdisc playergraph_seed abs_disc y repeat_participant attention phase playertreatment_group playerchosenStakes playertime_spent_on_question

tempfile phase1
save "`phase1'"

** data from phase 2 **
use "${dat}/Phase II/survey_merged_PhaseII.dta", clear

gen phase = 2
keep pid gid playerdgp playerdisc playergraph_seed abs_disc y repeat_participant attention phase playertreatment_group playerchosenStakes playertime_spent_on_question

tempfile phase2
save "`phase2'"

** data from phase 3 **
use "${dat}/Phase III/survey_merged_PhaseIII.dta", clear

gen phase = 3
keep pid gid playerdgp playerdisc playergraph_seed abs_disc y repeat_participant attention phase playertreatment_group playerchosenStakes playertime_spent_on_question

tempfile phase3
save "`phase3'"

** data from phase 6 **
use "${dat}/Phase VI/survey_merged_PhaseVI.dta", clear

gen phase = 6
keep pid gid playerdgp playerdisc playergraph_seed abs_disc y repeat_participant attention phase playertreatment_group playerchosenStakes playertime_spent_on_question

tempfile phase6
save "`phase6'"

use "`phase1'", clear
append using "`phase2'"
append using "`phase3'"
append using "`phase6'"

*Create merge variable
split gid, p("_")

gen file = gid1 + "_" + gid2 + "_" + gid7
gen filename = gid

drop gid1-gid8

merge m:1 file using "${dat}/rd_estimates_micro.dta"

keep if _merge==3
drop _merge

cap label variable repeat_participant "Indicator for whether subject participated in earlier phase of experiment (1=yes)"
cap label variable phase "Phase of non-expert experiments"
cap label variable file "Dataset"
cap label variable filename "Name of graph file"

save "${dat}/survey_merged_rd_estimates_micro.dta", replace

* Experts
use "${dat}/Expert Cornell/Expert Cornell part1.dta", clear
append using "${dat}/Expert Irvine/Expert Irvine part1.dta"
gen seminar = 1
append using "${dat}/Expert Jul 2019 Pilot/Expert Jul 2019 Pilot part1.dta"
append using "${dat}/Expert Aug 2019 Pilot/Expert Aug 2019 Pilot part1.dta"
append using "${dat}/Expert Aug 2019 First Round/Expert Aug 2019 First Round part1.dta"
replace seminar = 0 if seminar != 1
*Create merge variable
split gid, p("_")

gen file = gid1 + "_" + gid2 + "_" + gid7
gen filename = gid

drop gid1-gid8

merge m:1 filename using "${dat}/asymptotic_variances_rdd.dta", keep(master match)
drop _merge

merge m:1 file using "${dat}/rd_estimates_micro.dta"

keep if _merge==3
drop _merge
gen playertreatment_group = "A"


cap label variable seminar "Indicator for whether expert player took survey during an in-person seminar (1=yes)"
cap label variable file "Dataset"
cap label variable filename "Name of graph file"
cap label variable playertreatment_group "Graphical treatment seen"

save "${dat}/expert_merged_rd_estimates_micro.dta", replace

log close
